Here I study how biomass density changes across treatments in the PatchSizePilot. In particular, I’m studying how biomass density changes across:
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## here() starts at /Users/ema/github/PatchSizePilot
culture_info = read.csv(here("data", "PatchSizePilot_culture_info.csv"), header = TRUE)
load(here("data", "population", "t0.RData")); t0 = pop_output
load(here("data", "population", "t1.RData")); t1 = pop_output
load(here("data", "population", "t2.RData")); t2 = pop_output
load(here("data", "population", "t3.RData")); t3 = pop_output
load(here("data", "population", "t4.RData")); t4 = pop_output
load(here("data", "population", "t5.RData")); t5 = pop_output
load(here("data", "population", "t6.RData")); t6 = pop_output
load(here("data", "population", "t7.RData")); t7 = pop_output
rm(pop_output)
t0$time = NA
t1$time = NA
t0$replicate_video = 1:12 #In t1 I took 12 videos of a single
t1$replicate_video = 1 #In t1 I took only 1 video/culture
t2$replicate_video = 1 #In t2 I took only 1 video/culture
t3$replicate_video = 1 #In t3 I took only 1 video/culture
t4$replicate_video = 1 #In t4 I took only 1 video/culture
t5$replicate_video = 1 #In t5 I took only 1 video/culture
t6$replicate_video = t6$replicate; t6$replicate = NULL
t7$replicate_video = t7$replicate; t7$replicate = NULL
#Create an elongated version of t0 so that each of the 110 cultures can have 12 video replicates at t0.
elongating_t0 = NULL
for (video in 1:nrow(t0)){
for (ID in 1:nrow(culture_info)) {
elongating_t0 = rbind(elongating_t0, t0[video,])
}
}
ID_vector = rep(1:nrow(culture_info), times = nrow(t0))
elongating_t0$culture_ID = ID_vector
t0 = merge(culture_info,elongating_t0, by="culture_ID")
t1 = merge(culture_info,t1,by="culture_ID")
t2 = merge(culture_info,t2,by="culture_ID")
t3 = merge(culture_info,t3,by="culture_ID")
t4 = merge(culture_info,t4,by="culture_ID")
t5 = merge(culture_info,t5,by="culture_ID")
t6 = merge(culture_info,t6,by="culture_ID")
t7 = merge(culture_info,t7,by="culture_ID")
ds = rbind(t0, t1, t2, t3, t4, t5, t6, t7); rm(elongating_t0, t0, t1, t2, t3, t4, t5, t6, t7)
# Switch from data point to day
ds$day = ds$time_point; ds$time_point = NULL
ds$day[ds$day=="t0"] = "0"
ds$day[ds$day=="t1"] = "4"
ds$day[ds$day=="t2"] = "8"
ds$day[ds$day=="t3"] = "12"
ds$day[ds$day=="t4"] = "16"
ds$day[ds$day=="t5"] = "20"
ds$day[ds$day=="t6"] = "24"
ds$day[ds$day=="t7"] = "28"
ds$day = as.numeric(ds$day)
ds = ds %>%
select(culture_ID, patch_size, disturbance, metaecosystem_type, bioarea_per_volume, replicate_video, day, metaecosystem, system_nr, eco_metaeco_type)
ds = ds[, c("culture_ID", "system_nr", "disturbance", "day", "patch_size", "metaecosystem", "metaecosystem_type", "eco_metaeco_type", "replicate_video","bioarea_per_volume")]
ds$eco_metaeco_type = factor(ds$eco_metaeco_type, levels=c('S', 'S (S_S)', 'S (S_L)', 'M', 'M (M_M)', 'L', 'L (L_L)', 'L (S_L)'))
ds_regional = ds %>%
group_by(culture_ID, system_nr, disturbance, day, patch_size, metaecosystem_type) %>%
summarise(patch_mean_bioarea_across_videos = mean(bioarea_per_volume)) %>%
group_by(system_nr, disturbance, day,metaecosystem_type) %>%
summarise(regional_mean_bioarea = mean(patch_mean_bioarea_across_videos))
Let’s now save all the plots together in a single image. As the single image is too large for R markdown, it cannot be displayed here.
grid = grid.arrange(p[[1]],p[[3]],p[[5]],p[[2]],p[[4]],p[[6]],
ncol=3, nrow=2,
top = textGrob("Low disturbance",gp=gpar(fontsize=20,font=3)))
ggsave(here("results", "biomass", "Clean_biomass_low.jpg"), grid, width = 22, height = 13)
Let’s save also here for high disturbance.
biomass_plots("high")
grid = grid.arrange(p[[1]],p[[3]],p[[5]],p[[2]],p[[4]],p[[6]],
ncol=3, nrow=2,
top = textGrob("High disturbance",gp=gpar(fontsize=20,font=3)))
ggsave(here("results", "biomass", "Clean_biomass_high.jpg"), grid, width = 22, height = 13)
To build the mixed effect models we will use the R package lme4. See page 6 of this PDF to know more about the syntaxis of this package.
To do model diagnostics, I’m going to look at the following two plots (as suggested by Zuur et al. (2009), page 487):
When modelling the regional biomass, we want to understand if the regional biomass produced by an ecosystem with a small and a large patch (metaecosystem_type = S_L) is lower than the regional biomass produced by an ecosystem with two medium patches (metaecosystem_type = M_M).
First of all, let’s produce a data set including the regional biomass of all our meta ecosystems. In this data set, not only we want to have the regional biomass of the meta-ecosystems (averaged first across videos and then across patches) but also:
We want to include only the meta-ecosystems in which patches had both medium size (metaecosystem_type = M_M) and meta-ecosystems in which patches had one a small size and the other large size (metaecosystem_type = S_L).
We want to take off the first time point (day = 0). This is because all the meta-ecosystems had the same regional biomass, which was a construct of our experimental design.
ds_regional = ds_regional %>%
filter (metaecosystem_type == "M_M" | metaecosystem_type == "S_L", day != 0)
First of all, let’s include time as a random variable. Let’s start from the largest mixed effect model makes sense to construct. This will include:
We are actually going to construct two full models. One with the correlated and one with the uncorrelated random slopes and intercept, the other without.
#Uncorrelated intercepts and slopes
full_model = lmer(regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + (metaecosystem_type || day) + (disturbance || day) + (metaecosystem_type*disturbance || day) + (metaecosystem_type || system_nr) + (disturbance || system_nr) + (metaecosystem_type*disturbance || system_nr), data = ds_regional, REML = FALSE)
#Correlated intercepts and slopes
full_model_correlated = lmer(regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + (metaecosystem_type | day) + (disturbance | day) + (metaecosystem_type*disturbance | day) + (metaecosystem_type | system_nr) + (disturbance | system_nr) + (metaecosystem_type*disturbance | system_nr), data = ds_regional, REML = FALSE)
anova(full_model, full_model_correlated)
## Data: ds_regional
## Models:
## full_model_correlated: regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + (metaecosystem_type | day) + (disturbance | day) + (metaecosystem_type * disturbance | day) + (metaecosystem_type | system_nr) + (disturbance | system_nr) + (metaecosystem_type * disturbance | system_nr)
## full_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day)) + ((1 | day) + (0 + metaecosystem_type | day) + (0 + disturbance | day) + (0 + metaecosystem_type:disturbance | day)) + ((1 | system_nr) + (0 + metaecosystem_type | system_nr)) + ((1 | system_nr) + (0 + disturbance | system_nr)) + ((1 | system_nr) + (0 + metaecosystem_type | system_nr) + (0 + disturbance | system_nr) + (0 + metaecosystem_type:disturbance | system_nr))
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## full_model_correlated 37 2221.3 2330.2 -1073.7 2147.3
## full_model 55 2257.3 2419.1 -1073.7 2147.3 0 18 1
Although the AIC for the model with correlated slopes is lower, there is not statistical significance between the two models. Let’s then keep as a full model the one with non correlated slopes (full_model).
no_system_nr_model = lmer(regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + (metaecosystem_type || day) + (disturbance || day) + (metaecosystem_type*disturbance || day), data = ds_regional, REML = FALSE)
anova(full_model, no_system_nr_model)
## Data: ds_regional
## Models:
## no_system_nr_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day)) + ((1 | day) + (0 + metaecosystem_type | day) + (0 + disturbance | day) + (0 + metaecosystem_type:disturbance | day))
## full_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day)) + ((1 | day) + (0 + metaecosystem_type | day) + (0 + disturbance | day) + (0 + metaecosystem_type:disturbance | day)) + ((1 | system_nr) + (0 + metaecosystem_type | system_nr)) + ((1 | system_nr) + (0 + disturbance | system_nr)) + ((1 | system_nr) + (0 + metaecosystem_type | system_nr) + (0 + disturbance | system_nr) + (0 + metaecosystem_type:disturbance | system_nr))
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## no_system_nr_model 30 2217.1 2305.4 -1078.5 2157.1
## full_model 55 2257.3 2419.1 -1073.7 2147.3 9.7713 25 0.9972
The difference between the two doesn’t seem to be significant. Therefore, let’s throw out the system_nr from our model.
fixed_effects_model = lm(regional_mean_bioarea ~ metaecosystem_type + disturbance, data = ds_regional)
anova(no_system_nr_model, fixed_effects_model)
## Data: ds_regional
## Models:
## fixed_effects_model: regional_mean_bioarea ~ metaecosystem_type + disturbance
## no_system_nr_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day)) + ((1 | day) + (0 + metaecosystem_type | day) + (0 + disturbance | day) + (0 + metaecosystem_type:disturbance | day))
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## fixed_effects_model 4 2363.1 2374.9 -1177.6 2355.1
## no_system_nr_model 30 2217.1 2305.4 -1078.5 2157.1 198.02 26 < 2.2e-16
##
## fixed_effects_model
## no_system_nr_model ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model is better, as we expected. Time plays such an important part in explaining the way that biomass changes.
no_interaction_model = lmer(regional_mean_bioarea ~ metaecosystem_type + disturbance + (metaecosystem_type || day) + (disturbance || day), data = ds_regional, REML = FALSE)
anova(no_system_nr_model, no_interaction_model)
## Data: ds_regional
## Models:
## no_interaction_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day))
## no_system_nr_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + metaecosystem_type * disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day)) + ((1 | day) + (0 + metaecosystem_type | day) + (0 + disturbance | day) + (0 + metaecosystem_type:disturbance | day))
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## no_interaction_model 12 2183.2 2218.4 -1079.6 2159.2
## no_system_nr_model 30 2217.1 2305.4 -1078.5 2157.1 2.0436 18 1
There is no statistical significance between the two models. Therefore, let’s drop the interaction between disturbance and meta-ecosystem type.
no_slopes_model = lmer(regional_mean_bioarea ~ metaecosystem_type + disturbance + (1 | day), data = ds_regional, REML = FALSE)
anova(no_interaction_model, no_slopes_model)
## Data: ds_regional
## Models:
## no_slopes_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + (1 | day)
## no_interaction_model: regional_mean_bioarea ~ metaecosystem_type + disturbance + ((1 | day) + (0 + metaecosystem_type | day)) + ((1 | day) + (0 + disturbance | day))
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## no_slopes_model 5 2185.8 2200.5 -1087.9 2175.8
## no_interaction_model 12 2183.2 2218.4 -1079.6 2159.2 16.598 7 0.02018 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Yes. We should keep the random slopes. Well, it seems like this is our best model then.
After model selection, we determined that the best model is the following one:
regional_mean_bioarea ~ metaecosystem_type + disturbance + (metaecosystem_type || day) + (disturbance || day)
Let’s call it then best_model to don’t get confused.
best_model = no_interaction_model
Let’s now calculate the statistical significance and effect size of
the best model. The marginal and conditional r squared are calculated
using the package MuMIn. For the coding and interpretation
of these r squared check the documentation
for the r.squaredGLMM function.
model.null = lmer(regional_mean_bioarea ~ (1 | day), data = ds_regional, REML = FALSE)
r2_best = r.squaredGLMM(best_model)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.
r2_best = round(r2_best, digits = 3)
anova = anova(best_model, model.null)
p_best = anova$`Pr(>Chisq)`[2]
p_best = round(p_best, digits = 5)
if (p_best < 0.00001) {
p_best = "< 0.00001"
}
metaecosystem_type_null = lmer(regional_mean_bioarea ~ disturbance + (disturbance || day),
data = ds_regional,
REML = FALSE,
control = lmerControl(optimizer ='optimx',
optCtrl=list(method='L-BFGS-B')))
r2_no_metaecosystem_type = r.squaredGLMM(metaecosystem_type_null)
r2_no_metaecosystem_type = round(r2_no_metaecosystem_type, digits = 3)
anova = anova(best_model, metaecosystem_type_null)
p_no_metaecosystem_type = anova$`Pr(>Chisq)`[2]
p_no_metaecosystem_type = round(p_no_metaecosystem_type, digits = 5)
if (p_no_metaecosystem_type < 0.00001) {
p_no_metaecosystem_type = "< 0.00001"
}
disturbance_null = lmer(regional_mean_bioarea ~ metaecosystem_type + (metaecosystem_type || day),
data = ds_regional,
REML = FALSE,
control = lmerControl(optimizer ='optimx',
optCtrl=list(method='L-BFGS-B')))
r2_no_disturbance = r.squaredGLMM(disturbance_null)
r2_no_disturbance = round(r2_no_disturbance, digits = 3)
anova = anova(best_model, disturbance_null)
p_no_disturbance = anova$`Pr(>Chisq)`[2]
p_no_disturbance = round(p_no_disturbance, digits = 5)
if (p_no_disturbance < 0.00001) {
p_no_disturbance = "< 0.00001"
}
The effect size and significance of our results can be found in the following table.
| Model | Marginal R2 | Marginal R2 of the best model explained | Conditional R2 | Conditional R2 of the best model explained | P-value |
|---|---|---|---|---|---|
| Best model | 0.077 | / | 0.872 | / | < 0.00001 |
| Best model without meta-ecosystem type | 0.039 | 0.038 | 0.746 | 0.126 | < 0.00001 |
| Best model without disturbance | 0.06 | 0.017 | 0.77 | 0.102 | < 0.00001 |
This means that meta-ecosystem type should explain more than disturbance regional biomass dynamics. However, the effect size isn’t that high (0.038). Because of this, we thought we should fit a function to the time dynamics of the regional biomass.
plot(full_model)
qqnorm(resid(full_model))
plot(full_model_correlated)
qqnorm(resid(full_model_correlated))
plot(no_system_nr_model)
qqnorm(resid(no_system_nr_model))
par(mfrow=c(2,3))
plot(fixed_effects_model, which = 1:5)
par(mfrow=c(1,1))
plot(no_system_nr_model)
qqnorm(resid(no_system_nr_model))
plot(no_slopes_model)
qqnorm(resid(no_slopes_model))
plot(metaecosystem_type_null)
qqnorm(resid(metaecosystem_type_null))
plot(disturbance_null)
qqnorm(resid(disturbance_null))